#	------------------------------------------------------------------------------
# 	File-Name: 	2_fe_regression.R
#	  Date:  		20/07/2020
#	  Author: 	Setu Pelz, Johannes Urpelainen
#	  Purpose:   	.R files to replicate analysis in:
#					Pelz, Setu and Johannes Urpelainen
#					"Measuring and explaining household access to electrical energy 
#         services: Evidence from rural northern India" Energy Policy.
#         DOI: https://doi.org/10.1016/j.enpol.2020.111782
#
#		All survey data come from:
#					Aklin, Michaël; Cheng, Chao-yo; Ganesan, Karthik; Jain, Abhishek; 
#         Urpelainen, Johannes, 2016, "Access to Clean Cooking Energy and 
#         Electricity: Survey of States in India (ACCESS)", 
#         https://doi.org/10.7910/DVN/0NV9LF, Harvard Dataverse, V1,
#         AND
#         Mani, Sunil; Shahidi, Tauseef; Patnaik, Sasmita; Jain, Abhishek; 
#         Tripathi, Saurabh; Ganesan, Karthik; Aklin, Michaël; Urpelainen, 
#         Johannes; Chindarkar, Namrata; Council on Energy, Environment and 
#         Water; Initiative for Sustainable Energy Policy; National University 
#         of Singapore, 2019, "Access to Clean Cooking Energy and Electricity: 
#         Survey of States in India 2018 (ACCESS 2018)", 
#         https://doi.org/10.7910/DVN/AHFINM, Harvard Dataverse, V1,
#	------------------------------------------------------------------------------

# Load Necessary Packages ------------------------------------------------------

#install.packages("pacman")
library(pacman)

### Note, the following lines install packages, double-check if this desired ###

# Tidyverse
p_load(dplyr, forcats, ggplot2, srvyr, tidyr, naniar, patchwork, stringr)

# Estimation
p_load(fixest, margins, etable, ggeffects, clubSandwich)

# Misc
p_load(here, readstata13, survey, patchwork, corrplot, stargazer, readxl)

# Custom function
source(here::here("R", "custom_functions.R"))

# Read in ACCESS panel dataset -------------------------------------------------
source(here("R","load_data.R"))

# This sets the factors to desired global levels

levels_srv <- c("Lighting", "ICTs", "Entertainment", "Space Cooling", "Refrigeration", 
                "Thermal Loads", "Mechanical Loads", "Cooking")

# Set variables lists  ---------------------------------------------------------

bin_vars <- c("educ10th", "scst", "puccahouse", "bplaay")

num_vars <- c("hhsize", "exp", "villcomspc", "timetocity")

cont_vars <- c("duration_day", "reliability")

tier_vars <- c("elec_duration_tier", "elec_reliability_tier")

# Summary stats OLS regression dataset -----------------------------------------

svydata <- pool %>%
  as_survey_design(id=hh, strata=village, weight = weights, nest = TRUE)

# Note: quite slow due to weighted aggregates
summary_a <- svydata %>% 
  group_by(wave) %>% 
  select(wave, num_vars, bin_vars, cont_vars, tier_vars, "srv_tot", "useelec", "satisfied") %>% 
  srvyr::summarise_at(
    c(num_vars, bin_vars, tier_vars, cont_vars, "srv_tot", "useelec", "satisfied"),
    list(ME = ~round(survey_mean(., na.rm = T),2),
         SD = ~round(survey_sd(., na.rm = T),2))
  )

summary_a <- summary_a %>% 
  select(-matches("_se")) %>% 
  gather(Variable, value, -wave) %>% 
  separate(col = Variable, into = c("Variable", "Stat"), sep = -3) %>%
  pivot_wider(names_from = Stat) %>% 
  rename("Mean" = "_ME", "SD" = "_SD") %>% 
  mutate(Variable = as.character(Variable)) %>% 
  rename(Wave = wave)

summary_b <- 
  pool %>% 
  select(wave, num_vars, bin_vars, cont_vars, tier_vars, "srv_tot", "useelec", "satisfied") %>% 
  gather(Variable, value, -c(wave)) %>% 
  group_by(wave, Variable) %>% 
  summarise_at(
    c("value"),
    list(Min = ~round(min(., na.rm = T),2),
         Max = ~round(max(., na.rm = T),2),
         Missing = ~sum(is.na(.)))
  ) %>% 
  ungroup() %>% 
  mutate(Variable = as.character(Variable)) %>% 
  rename(Wave = wave)

summary <- merge(summary_a, summary_b) %>% 
  mutate(Variable = fct_recode(
    factor(Variable, levels = c("srv_tot", "duration_day", "elec_duration_tier", 
                                "reliability", "elec_reliability_tier", 
                                "puccahouse", "educ10th", "exp", "timetocity", 
                                "villcomspc", "hhsize", "bplaay", 
                                "scst", "useelec", "satisfied")),
    "Electricity: Yes" = "useelec",
    "Satisfied: Yes" = "satisfied",
    "Total Distinct Energy Services" = "srv_tot",
    "Daily Electricity Hours" = "duration_day",
    "Availability Tier" = "elec_duration_tier",
    "Monthly Outage Days" = "reliability",
    "Reliability Tier" = "elec_reliability_tier",
    "House Structure: Pucca" = "puccahouse",
    "Education: 10th Grade +" = "educ10th",
    "Monthly Expenditure" = "exp",
    "Time to city" = "timetocity",
    "Village Community Spaces" = "villcomspc",
    "Household Size" = "hhsize",
    "BPL/AAY: True" = "bplaay",
    "SC/ST: True" = "scst")
  ) %>% 
  arrange(Variable) %>% 
  mutate(Wave = ifelse(Wave == 0, "2014", "2018"))

stargazer(summary %>% filter(Wave == "2014"), summary = F, digits = 2, float = F,
          rownames = F, out = here("Manuscript", "Tables", "summary_data_2014.tex"))

stargazer(summary %>% filter(Wave == "2018"), summary = F, digits = 2, float = F,
          out = here("Manuscript", "Tables", "summary_data_2018.tex"), rownames = F)

rm(summary_a, summary_b)

# Correlation plots FE Regression dataset ---------------------------------------

cor <- cor(pool %>% 
             dplyr::select(cont_vars, tier_vars, num_vars, bin_vars, wave, srv_tot, "satisfied") %>% 
             na.omit() %>% 
             rename("Total Distinct Energy Services" = "srv_tot",
                    "Daily Electricity Hours" = "duration_day",
                    "Availability Tier" = "elec_duration_tier",
                    "Monthly Outage Days" = "reliability",
                    "Reliability Tier" = "elec_reliability_tier",
                    "House Structure: Pucca" = "puccahouse",
                    "Education: 10th Grade +" = "educ10th",
                    "Monthly Expenditure" = "exp",
                    "Time to city" = "timetocity",
                    "Village Community Spaces" = "villcomspc",
                    "Household Size" = "hhsize",
                    "BPL/AAY: True" = "bplaay",
                    "SC/ST: True" = "scst",
                    "Survey Wave: 2018" = "wave",
                    "Satisfied: Yes" = "satisfied"))

png(height=800, width=600, pointsize = 15, 
    file= here("Manuscript","Figures","8_cor.png"))

corrplot::corrplot(cor, type = "lower", 
                   order = "alphabet",
                   tl.col = "black", diag = F, win.asp = 1)

dev.off()

rm(cor)

# FE Regression Total Different Services: Duration -----------------------------

srvtot_statefe <- feols(srv_tot ~ duration_day + 
                          reliability + hhsize + lnexp +
                          educ10th + scst + puccahouse + bplaay + timetocity + villcomspc,
                        fixef = c("wave", "state"), data = pool, weights = pool$weights)

srvtot_distfe <- feols(srv_tot ~ duration_day + 
                         reliability + hhsize + lnexp +
                         educ10th + scst + puccahouse + bplaay + timetocity  + villcomspc,
                       fixef = c("wave", "district"), data = pool, weights = pool$weights)


srvtot_villfe <- feols(srv_tot ~ duration_day + 
                         reliability + hhsize + lnexp +
                         educ10th + scst + puccahouse + bplaay,
                       fixef = c("wave", "village"), data = pool, weights = pool$weights)


srvtot_hhfe <- feols(srv_tot ~ duration_day + 
                       reliability + hhsize + lnexp +
                       educ10th + scst + puccahouse + bplaay,
                     fixef = c("wave", "hh"), data = pool, weights = pool$weights)

# Model lists
foels_lost <- list(srvtot_statefe, srvtot_distfe, srvtot_villfe,
                   srvtot_hhfe)

esttex(foels_lost, 
       se = "cluster",
       cluster = "village",
       digits = 3,
       dict = c(duration_day = "Daily Electricity Hours", reliability = "Monthly Outage Days",
                quality_low = "Low Voltage Days", quality_fluc = "Voltage Fluctuation Days",
                wave = "Year", srv_tot = "Total Distinct Energy Services",
                villcomspc = "Village Community Spaces", 
                hhsize = "Household Size", lnexp = "Log Monthly Expenditure",
                timetocity = "Time To City", educ10th = "Education: At least 10th",
                scst = "SC/ST: Yes", bplaay = "BPL/AAY: Yes", 
                puccahouse = "House Structure: Pucca", state = "State",
                district = "District", village = "Village", hh = "Household"),
       fitstat = c("r2"),
       fixef_sizes = T,
       float = F,
       replace = T,
       file = here("Manuscript", "Tables", "FE_mainresults_full.tex"))

# FE Regression Total Different Services: CEEW Tier ----------------------------

srvtot_statefe <- feols(srv_tot ~ elec_duration_tier + 
                          elec_reliability_tier + hhsize + lnexp +
                          educ10th + scst + puccahouse + bplaay + timetocity  + villcomspc,
                        fixef = c("wave", "state"), data = pool, weights = pool$weights)

srvtot_distfe <- feols(srv_tot ~ elec_duration_tier + 
                         elec_reliability_tier + hhsize + lnexp +
                         educ10th + scst + puccahouse + bplaay + timetocity  + villcomspc,
                       fixef = c("wave", "district"), data = pool, weights = pool$weights)


srvtot_villfe <- feols(srv_tot ~ elec_duration_tier + 
                         elec_reliability_tier + hhsize + lnexp +
                         educ10th + scst + puccahouse + bplaay,
                       fixef = c("wave", "village"), data = pool, weights = pool$weights)


srvtot_hhfe <- feols(srv_tot ~ elec_duration_tier + 
                       elec_reliability_tier + hhsize + lnexp +
                       educ10th + scst + puccahouse + bplaay,
                     fixef = c("wave", "hh"), data = pool, weights = pool$weights)

# Model lists
foels_lost <- list(srvtot_statefe, srvtot_distfe, srvtot_villfe,
                   srvtot_hhfe)

esttex(foels_lost, 
       se = "cluster",
       cluster = "village",
       digits = 3,
       dict = c(elec_duration_tier = "Availability Tier", elec_reliability_tier = "Reliability Tier", 
                wave = "Year",  srv_tot = "Total Distinct Energy Services",
                useelec = "Use Electricity: Yes", villcomspc = "Village Community Spaces",
                hhsize = "Household Size", lnexp = "Log Monthly Expenditure",
                timetocity = "Time to city", educ10th = "Education: At least 10th",
                scst = "SC/ST: Yes", bplaay = "BPL/AAY: Yes", 
                puccahouse = "House Structure: Pucca", state = "State",
                district = "District", village = "Village", hh = "Household"),
       fitstat = c("r2"),
       fixef_sizes = T,
       float = F,
       replace = T,
       file = here("Manuscript", "Tables", "FE_mainresults_full_tier.tex"))

# Logit models: Dataset preparation --------------------------------------------

pool_logit <- pool %>% 
  mutate_at(
    c(tier_vars, cont_vars, "puccahouse", "educ10th", "exp", "villcomspc", "hhsize",
      "timetocity", "bplaay", "scst"), 
    .funs = c(scale = ~arm::rescale(.)))

levels_logit <- c("duration_day_scale", "puccahouse_scale", "educ10th_scale", 
                  "exp_scale", "villcomspc_scale", "hhsize_scale", 
                  "timetocity_scale", "bplaay_scale", "scst_scale")

pool_svyglm <- pool_logit %>% 
  select(state, wave, village, hh, srv_light:srv_cook, satisfied, levels_logit, weights,
         reliability_scale, elec_duration_tier_scale, elec_reliability_tier_scale,
         bin_vars, num_vars, cont_vars, tier_vars, exp, lnexp) %>% 
  na.omit()

svydata_glm <- as_survey_design(pool_svyglm, id=hh, strata=village, weight = weights, 
                            nest = TRUE)

SD_summary <- pool_svyglm %>% 
  select(bin_vars, hhsize, exp, villcomspc, timetocity, duration_day) %>%
  summarise_all(~sd(.)) %>% 
  gather(Variable, SD) %>% 
  mutate(Variable = fct_recode(fct_rev(factor(Variable)),
                               "Daily Electricity Hours" = "duration_day",
                               "House Structure: Pucca" = "puccahouse",
                               "Education: 10th Grade +" = "educ10th",
                               "Monthly Expenditure" = "exp",
                               "Time to city" = "timetocity",
                               "Village Community Spaces" = "villcomspc",
                               "Household Size" = "hhsize",
                               "BPL/AAY: True" = "bplaay",
                               "SC/ST: True" = "scst"),
         SD = round(SD, 2))

stargazer(SD_summary, summary = F, digits = 2, float = F,
          rownames = F, out = here("Manuscript", "Tables", "SD_summary.tex"))

# Logit models: Services -------------------------------------------------------

# Set up survey design for weighted glm

glm_light <- svyglm(srv_light ~ duration_day_scale + timetocity_scale + 
                      villcomspc_scale + hhsize_scale + exp_scale + 
                      educ10th_scale + scst_scale + puccahouse_scale + 
                      bplaay_scale + factor(wave) + factor(state),
                   family = "quasibinomial", design = svydata_glm)

coef_light <- coef_test(glm_light, vcov = "CR2", 
                        cluster = pool_svyglm$village, test = "Satterthwaite")

glm_ict <- svyglm(srv_ict ~ duration_day_scale + timetocity_scale + 
                    villcomspc_scale + hhsize_scale + exp_scale + 
                    educ10th_scale + scst_scale + puccahouse_scale + 
                    bplaay_scale + factor(wave) + factor(state),
                  family = "quasibinomial", design = svydata_glm)

coef_ict <- coef_test(glm_ict, vcov = "CR2", 
                      cluster = pool_svyglm$village, test = "Satterthwaite")

glm_ent <- svyglm(srv_ent ~ duration_day_scale + timetocity_scale + 
                    villcomspc_scale + hhsize_scale + exp_scale + 
                    educ10th_scale + scst_scale + puccahouse_scale + 
                    bplaay_scale + factor(wave) + factor(state),
                  family = "quasibinomial", design = svydata_glm)

coef_ent <- coef_test(glm_ent, vcov = "CR2", 
                      cluster = pool_svyglm$village, test = "Satterthwaite")

glm_spch <- svyglm(srv_spch ~ duration_day_scale + timetocity_scale + 
                     villcomspc_scale + hhsize_scale + exp_scale + 
                     educ10th_scale + scst_scale + puccahouse_scale + 
                     bplaay_scale + factor(wave) + factor(state),
                   family = "quasibinomial", design = svydata_glm)

coef_spch <- coef_test(glm_spch, vcov = "CR2", 
                       cluster = pool_svyglm$village, test = "Satterthwaite")

glm_frdg <- svyglm(srv_frdg ~ duration_day_scale + timetocity_scale + 
                     villcomspc_scale + hhsize_scale + exp_scale + 
                     educ10th_scale + scst_scale + puccahouse_scale + 
                     bplaay_scale + factor(wave) + factor(state),
                   family = "quasibinomial", design = svydata_glm)

coef_frdg <- coef_test(glm_frdg, vcov = "CR2", 
                       cluster = pool_svyglm$village, test = "Satterthwaite")

glm_mech <- svyglm(srv_mech ~ duration_day_scale + timetocity_scale + 
                     villcomspc_scale + hhsize_scale + exp_scale + 
                     educ10th_scale + scst_scale + puccahouse_scale + 
                     bplaay_scale + factor(wave) + factor(state),
                   family = "quasibinomial", design = svydata_glm)

coef_mech <- coef_test(glm_mech, vcov = "CR2", 
                       cluster = pool_svyglm$village, test = "Satterthwaite")

glm_thrm <- svyglm(srv_thrm ~ duration_day_scale + timetocity_scale + 
                     villcomspc_scale + hhsize_scale + exp_scale + 
                     educ10th_scale + scst_scale + puccahouse_scale + 
                     bplaay_scale + factor(wave) + factor(state),
                   family = "quasibinomial", design = svydata_glm)

coef_thrm <- coef_test(glm_thrm, vcov = "CR2", 
                       cluster = pool_svyglm$village, test = "Satterthwaite")

glm_cook <- svyglm(srv_cook ~ duration_day_scale + timetocity_scale + 
                     villcomspc_scale + hhsize_scale + exp_scale + 
                     educ10th_scale + scst_scale + puccahouse_scale + 
                     bplaay_scale + factor(wave) + factor(state),
                   family = "quasibinomial", design = svydata_glm)

coef_cook <- coef_test(glm_cook, vcov = "CR2", 
                       cluster = pool_svyglm$village, test = "Satterthwaite")

## Model and coefficient lists with state fixed effects  ##
coefs <- list(coef_light[,2], coef_ict[,2], coef_ent[,2], coef_spch[,2],
              coef_frdg[,2], coef_mech[,2], coef_thrm[,2], coef_cook[,2])

glms <- list(glm_light, glm_ict, glm_ent, glm_spch, glm_frdg, glm_mech,
             glm_thrm, glm_cook)

# Log-odds Stargazer output 

stargazer(header = F,
          glms,
          se = coefs,
          keep = levels_logit,
          order = levels_logit,
          covariate.labels = c("Daily Electricity Hours (2 std)", "House Structure: Pucca", 
                               "Education: 10th Grade +", "Log Monthly Expenditures (2 std)", 
                               "Village Community Spaces (2 std)", "Household Size (2 std)",  
                               "Time to City (2 std)", "BPL/AAY: True",
                               "SC/ST: True"),
          dep.var.labels.include = T, 
          dep.var.labels = levels_srv,
          no.space = T,
          suppress.errors = F,
          float = F,
          add.lines = list(c("Fixed Effects", rep("State and Wave", 8))),
          out = here("Manuscript", "Tables", "logit_mainresults_logodds.tex"))

# Logit average marginal effects : Services ------------------------------------

logit_ame <- data.frame(matrix(nrow = 0, ncol = 8))
colnames(logit_ame) <- c("factor", "AME", "SE", "z", "p", "lower", "upper", "model")

for (i in glms) {
  ame <- summary(margins(i, variables = levels_logit, design = svydata_glm))
  ame$model <- names(i[["model"]][1])
  ame <- ame %>% filter(!(grepl(pattern = "state", factor)))
  logit_ame <- rbind(logit_ame, ame)
}

logit_ame_labelled <- logit_ame %>% 
  filter(!(factor %in% c("wave", "state"))) %>% 
  mutate(
    AME = as.numeric(AME),
    model = case_when(
      model == "srv_ict" ~ "ICTs",
      model == "srv_ent" ~ "Entertainment",
      model == "srv_light" ~ "Lighting",
      model == "srv_spch" ~ "Space Cooling",
      model == "srv_thrm" ~ "Thermal Loads",
      model == "srv_frdg" ~ "Refrigeration",
      model == "srv_mech" ~ "Mechanical Loads",
      model == "srv_cook" ~ "Cooking"),
    model = factor(model, levels = levels_srv),
    factor = fct_recode(fct_rev(factor(factor, levels = levels_logit)),
                     "Daily Electricity Hours (2 std)" = "duration_day_scale",
                     "House Structure: Pucca" = "puccahouse_scale",
                     "Education: 10th Grade +" = "educ10th_scale",
                     "Monthly Expenditure (2 std)" = "exp_scale",
                     "Time to city (2 std)" = "timetocity_scale",
                     "Village Community Spaces (2 std)" = "villcomspc_scale",
                     "Household Size (2 std)" = "hhsize_scale",
                     "BPL/AAY: True" = "bplaay_scale",
                     "SC/ST: True" = "scst_scale"))

logit_ame_longer <- logit_ame_labelled %>% 
  transmute(Factor = fct_rev(factor),
            model = model,
            "AME_SE" = paste0(round(AME,3), " (",round(SE,3),") ")) %>% 
  spread(key = model, value = AME_SE) %>% 
  select(Factor, levels_srv) %>% 
  arrange(Factor)

stargazer(logit_ame_longer, summary = F, float = F, rownames = F, digits = 3, 
          out = here("Manuscript", "Tables", "logit_mainresults_ame.tex"))

logit_ame_labelled %>% 
  mutate(
    lower_corr = AME - (1.96*SE),
    upper_corr = AME + (1.96*SE)
  ) %>% 
  ggplot(aes(factor, AME, ymin = lower_corr, ymax = upper_corr)) +
  geom_hline(yintercept = 0, linetype = 2, size = 0.5, colour = "red") +
  geom_point() +
  geom_errorbar() +
  coord_flip() +
  scale_y_continuous(breaks = seq(-0.5,1.5,0.05)) +
  facet_wrap(~model, ncol = 4) +
  theme_bw() +
  theme(strip.background = element_rect(fill = NA, color = "black"),
        plot.caption = element_text(hjust = 1),
        axis.ticks.x = element_blank()) +
  labs(x = NULL, 
       y = "Average Marginal Effect")

ggsave(here("Manuscript","Figures","logit_ame_scale.png"),
       device = "png", width = 10, height = 6*0.68, units = "in")

# Logit models: Satisfaction ---------------------------------------------------

glm_dur <- survey::svyglm(satisfied ~ duration_day + reliability + timetocity + 
                            villcomspc + hhsize + lnexp + 
                            educ10th + scst + puccahouse + 
                            bplaay + factor(wave) + factor(state),
                          family = "quasibinomial", design = svydata_glm)

coef_dur <- coef_test(glm_dur, vcov = "CR2", 
                      cluster = pool_svyglm$village, test = "Satterthwaite")

glm_tier <- survey::svyglm(satisfied ~ elec_duration_tier + elec_reliability_tier + timetocity + 
                             villcomspc + hhsize + lnexp + 
                             educ10th + scst + puccahouse + 
                             bplaay + factor(wave) + factor(state),
                           family = "quasibinomial", design = svydata_glm)

coef_tier <- coef_test(glm_tier, vcov = "CR2", 
                       cluster = pool_svyglm$village, test = "Satterthwaite")

## Model and coefficient lists with state fixed effects  ##
coefs <- list(coef_dur[,2], coef_tier[,2])

glms <- list(glm_dur, glm_tier)

# Stargazer plot
stargazer(header = F,
          glms,
          se = coefs,
          keep = c(cont_vars, tier_vars, c("hhsize", "lnexp", "villcomspc", "timetocity"), bin_vars),
          dep.var.labels = c("Supply Satisfaction", "Supply Satisfaction"), 
          order = c(cont_vars, tier_vars, c("hhsize", "lnexp", "villcomspc", "timetocity"), bin_vars),
          covariate.labels = c("Daily Electricity Hours", "Monthly Outage Days",
                               "Availability Tier", "Reliability Tier",
                               "Time to City", "Village Community Spaces",
                               "Household Size", "Log Monthly Expenditure",
                               "Education: At least 10th", "SC/ST: Yes",
                               "House Structure: Pucca", "BPL/AAY: Yes"
                               ),
          no.space = T,
          suppress.errors = F,
          float = F,
          omit.stat = c("aic","ll"),
          add.lines = list(c("Fixed Effects: State (6)", "Yes", "Yes")),
          out = here("Manuscript", "Tables", "FE_satisfaction.tex"))

logit_satis_me <- data.frame(matrix(nrow = 0, ncol = 6))
colnames(logit_satis_me) <- c("x", "predicted", "conf.low", "conf.high", "model", "var")

marginal_effects <- rbind(rbind(
  ggpredict(glm_dur, terms = "duration_day", 
            vcov.fun = "vcovCR", vcov.type = "CR2", 
            vcov.args = list(cluster = pool_svyglm$village)) %>% 
    mutate(var = "Daily Electricity Hours",
           conf.low = predicted - 1.96 * std.error,
           conf.high = predicted + 1.96 * std.error),
  ggpredict(glm_dur, terms = "reliability", 
            vcov.fun = "vcovCR", vcov.type = "CR2", 
            vcov.args = list(cluster = pool_svyglm$village)) %>% 
    mutate(var = "Monthly Outage Days",
           conf.low = predicted - 1.96 * std.error,
           conf.high = predicted + 1.96 * std.error)),
  rbind(
    ggpredict(glm_tier, terms = "elec_duration_tier", 
              vcov.fun = "vcovCR", vcov.type = "CR2", 
              vcov.args = list(cluster = pool_svyglm$village)) %>% 
      mutate(var = "Availability Tier",
             conf.low = predicted - 1.96 * std.error,
             conf.high = predicted + 1.96 * std.error),
    ggpredict(glm_tier, terms = "elec_reliability_tier", 
              vcov.fun = "vcovCR", vcov.type = "CR2", 
              vcov.args = list(cluster = pool_svyglm$village)) %>% 
      mutate(var = "Reliability Tier",
             conf.low = predicted - 1.96 * std.error,
             conf.high = predicted + 1.96 * std.error)))

marginal_effects %>% 
  mutate(var = factor(var, levels = 
                        c("Daily Electricity Hours", "Availability Tier",
                          "Monthly Outage Days", "Reliability Tier"))) %>% 
  ggplot(aes(x, predicted, ymin = conf.low, ymax = conf.high)) +
  geom_line() +
  geom_ribbon(alpha = 0.5) +
  facet_wrap(~var, scale = "free_x") +
  scale_y_continuous(labels = scales::percent) +
  coord_cartesian(ylim = c(0,1)) +
  labs(x = "Attribute Value",
       y = "Modelled Probability of Supply Satisfaction") +
  theme_bw() +
  theme(strip.background = element_rect(fill = NA, color = "black"),
        plot.caption = element_text(hjust = 1),
        axis.ticks.x = element_blank())

ggsave(here("Manuscript","Figures","satis_logit.png"),
       device = "png", width = 6, height = 6*0.68, units = "in")

satis_ame <- rbind(
  dur_ame <- summary(margins(glm_dur, variables = c("duration_day", "reliability"), 
                             design = svydata_glm,
                             vcov = vcovCR(glm_dur, type = "CR2", 
                                    cluster = pool_svyglm$village))),
  tier_ame <- summary(margins(glm_tier, variables = c("elec_duration_tier",
                                                      "elec_reliability_tier"),
                              design = svydata_glm, 
                              vcov = vcovCR(glm_tier, type = "CR2", 
                                               cluster = pool_svyglm$village)))
) %>% 
  rename(Variable = factor)

satis_ame <- satis_ame %>%
  mutate(
    lower = AME - (1.96*SE),
    upper = AME + (1.96*SE)
  ) %>% 
  dplyr::mutate_at(c("AME", "SE", "z", "p", "lower", "upper"), round, 3) %>% 
  mutate(Variable = case_when(
    grepl(Variable, pattern = "duration_tier") ~ "Availability Tier",
    grepl(Variable, pattern = "reliability_tier") ~ "Reliability Tier",
    grepl(Variable, pattern = "duration") ~ "Daily Electricity Hours",
    grepl(Variable, pattern = "reliability") ~ "Monthly Outage Days")
  )

stargazer(satis_ame, summary = F, rownames = F, float = F, digits = 3,
          out = here("Manuscript", "Tables", "AME_satis.tex"))
